import re
from collections import Counter

# 读取圣经文本
with open("bible.txt", "r", encoding="utf-8") as file:
    text = file.read().lower()  # 转为小写

# 使用正则表达式提取单词
words = re.findall(r'\b\w+\b', text)

# 统计单词频率
word_counts = Counter(words)

# 获取最常用的1000个单词
most_common_words = word_counts.most_common(6000)

# 保存到Markdown文件
with open("bible_top_6000_words.md", "w", encoding="utf-8") as md_file:
    md_file.write("# 圣经中最常用的6000个英语单词\n\n")
    md_file.write("按使用频率排序：\n\n")
    for i, (word, count) in enumerate(most_common_words, start=1):
        md_file.write(f"{i}. {word} {count} \n")

print("Markdown文件已生成：bible_top_6000_words.md")
